STATS 506 - Homework 5

Author

Vincent Weng

Problem 1 - OOP Programming

Part (A) - For the rational class, define the following:

  1. A constructor
  2. A validator that ensures the denominator is non-zero.
  3. A show method.
  4. A simplify method, to obtain the simplest form (e.g. simplify(2/4) produces 1/2).
  5. A quotient method (e.g. quotient(3/7) produces .42857143…). It should support a digits argument but only in the printing, not the returned result (Hint: what does print return?).6. Addition, subtraction, multiplication, division. These should all return a rational.
  6. You’ll (probably) need GCD and LCM as part of some of these calculations; include these functions using Rcpp. Even if you don’t need these functions for another calculation, include them.
##' @title GCD Function
##' @param a and b
##' @return gcd output
gcd <- function(a, b) {
  while (b != 0) {
    t <- b
    b <- a %% b
    a <- t
  }
  abs(a)
}

# define rational S4 class
setClass("Rational",
         slots = list(numerator = "integer", denominator = "integer"),
         validity = function(object) {
           # check if denominator is zero
           if (object@denominator == 0) {
             stop("Denominator cannot be zero.")
           }
           # ensure both numerator and denominator are int
           if (!is.integer(object@numerator) || !is.integer(object@denominator)) {
             stop("Both numerator and denominator must be integers.")
           }
           TRUE
         })

# constructor function with validation
Rational <- function(numerator, denominator) {
  
  # check if numerator is a string and can be converted to an integer
  if (is.character(numerator)) {
    # attempt to convert to integer and check if the result is not NA
    converted_numerator <- suppressWarnings(as.integer(numerator))
    if (!is.na(converted_numerator)) {
      numerator <- converted_numerator
    } else {
      stop("Numerator must be convertible to an integer.")
    }
  }
  
  # check if denominator is a string and can be converted to an integer
  if (is.character(denominator)) {
    # attempt to convert to integer and check if the result is not NA
    converted_denominator <- suppressWarnings(as.integer(denominator))
    if (!is.na(converted_denominator)) {
      denominator <- converted_denominator
    } else {
      stop("Denominator must be convertible to an integer.")
    }
  }

  
  # checks that a vector of size 2 can be passed in
  if (is.numeric(numerator) && length(numerator) == 2) {
    denominator <- numerator[2]
    numerator <- numerator[1]
  }
  
  
  # ensure numerator and denominator are numeric and convert to int
  if (!is.numeric(numerator) || !is.numeric(denominator)) {
    stop("Both numerator and denominator must be numeric values.")
  }
  if (denominator == 0) {
    stop("Denominator cannot be zero.")
  }
  
  # convert to int
  numerator <- as.integer(numerator)
  denominator <- as.integer(denominator)
  
  # create new Rational object
  new("Rational", numerator = numerator, denominator = denominator)
}

# show method
setMethod("show", "Rational", function(object) {
  cat(object@numerator, "/", object@denominator, "\n")
})

# simplify method
setGeneric("simplify", function(object) standardGeneric("simplify"))
[1] "simplify"
setMethod("simplify", "Rational", function(object) {
  gcd_val <- gcd(object@numerator, object@denominator)
  object@numerator <- as.integer(object@numerator / gcd_val)
  object@denominator <- as.integer(object@denominator / gcd_val)
  object
})

# quotient method with digits argument
setGeneric("quotient", function(object, digits = 6) standardGeneric("quotient"))
[1] "quotient"
setMethod("quotient", "Rational", function(object, digits = 6) {
  round(as.numeric(object@numerator) / as.numeric(object@denominator), digits)
})

# arithmetic operations
setMethod("+", c("Rational", "Rational"), function(e1, e2) {
  denom <- as.integer(e1@denominator * e2@denominator)
  numer <- as.integer(e1@numerator * e2@denominator + e2@numerator * e1@denominator)
  simplify(Rational(numer, denom))
})

setMethod("-", c("Rational", "Rational"), function(e1, e2) {
  denom <- as.integer(e1@denominator * e2@denominator)
  numer <- as.integer(e1@numerator * e2@denominator - e2@numerator * e1@denominator)
  simplify(Rational(numer, denom))
})

setMethod("*", c("Rational", "Rational"), function(e1, e2) {
  simplify(Rational(as.integer(e1@numerator * e2@numerator), as.integer(e1@denominator * e2@denominator)))
})

setMethod("/", c("Rational", "Rational"), function(e1, e2) {
  if (e2@numerator == 0) stop("Division by zero.")
  simplify(Rational(as.integer(e1@numerator * e2@denominator), as.integer(e1@denominator * e2@numerator)))
})

Part (B) - Use your rational class to create three objects:

r1: 24/6 r2: 7/230 r3: 0/40

# defining r1, r2, and r3
r1 <- Rational(24, 6)
r2 <- Rational(7, 230)
r3 <- Rational(0, 4)

# list of expressions to evaluate with tryCatch
expressions <- list(
  quote(r1),
  quote(r3),
  quote(r1 + r2),
  quote(r1 - r2),
  quote(r1 * r2),
  quote(r1 / r2),
  quote(r1 + r3),
  quote(r1 * r3),
  quote(r2 / r3),
  quote(quotient(r1)),
  quote(quotient(r2)),
  quote(quotient(r2, digits = 3)),
  quote(quotient(r2, digits = 3.14)),
  quote(quotient(r2, digits = "avocado")),
  quote(q2 <- quotient(r2, digits = 3)),
  quote(q2),
  quote(quotient(r3)),
  quote(simplify(r1)),
  quote(simplify(r2)),
  quote(simplify(r3))
)

# loop through each expression and evaluate with tryCatch
for (expr in expressions) {
  tryCatch({
    result <- eval(expr) # evaluate the expression
    print(result)        # print result if successful
  }, error = function(e) {
    cat("Error:", e$message, "\n") # print error message if error
  })
}
24 / 6 
0 / 4 
927 / 230 
913 / 230 
14 / 115 
920 / 7 
4 / 1 
0 / 1 
Error: Division by zero. 
[1] 4
[1] 0.030435
[1] 0.03
[1] 0.03
Error: non-numeric argument to mathematical function 
[1] 0.03
[1] 0.03
[1] 0
4 / 1 
7 / 230 
0 / 1 

Part(C) - Show that your validator does not allow the creation of rational’s with 0 denominator, and check other malformed input to your constructor.

# valid example: should succeed
tryCatch({
  r1 <- Rational(24, 6)
  print(r1)
}, error = function(e) {
  cat("Error:", e$message, "\n")
})
24 / 6 
# invalid example: denominator is zero
tryCatch({
  r2 <- Rational(3, 0)
  print(r2)
}, error = function(e) {
  cat("Error:", e$message, "\n") # expected message: "Denominator cannot be zero."
})
Error: Denominator cannot be zero. 
# invalid example: non-numeric input
tryCatch({
  r3 <- Rational("a", 4)
  print(r3)
}, error = function(e) {
  cat("Error:", e$message, "\n") # expected message: "Both numerator and denominator must be numeric values."
})
Error: Numerator must be convertible to an integer. 
# invalid example: non-integer numeric input (will convert to integer if handled in Rational)
tryCatch({
  r4 <- Rational(3.5, 2.5)
  print(r4)
}, error = function(e) {
  cat("Error:", e$message, "\n")
})
3 / 2 
# valid example: str inputs that can be converted to int will be converted to make the Rational object
tryCatch({
  r4 <- Rational("10", "4")
  print(r4)
}, error = function(e) {
  cat("Error:", e$message, "\n")
})
10 / 4 

As shown above, the validator does not allow the creation of rationals with 0 denominator. Furthermore, it also checks that numerator and denominator are numeric inputs. Additionally, it also converts string inputs to integer if it is flexible, adding additional flexibility to the different inputs. Finally, it also checks that if floats are convertible to ints and converts it make a rational number.

Problem 2 - plotly

Let’s revisit the art data from the last problem set. Use plotly for these.

Part (A) - Regenerate your plot which addresses the second question from last time:

  1. Does the distribution of genre of sales across years appear to change?

You may copy your plot from last time, or copy my plot from the solutions, or come up with your own new plot.

library(plotly)
Loading required package: ggplot2

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
# read in data
art <- read.csv("df_for_ml_improved_new_market.csv")

# create 'genre' column
art$Genre___Others[art$Genre___Painting == 1] <- 0
art$genre <- "Photography"
art$genre[art$Genre___Print == 1] <- "Print"
art$genre[art$Genre___Sculpture == 1] <- "Sculpture"
art$genre[art$Genre___Painting == 1] <- "Painting"
art$genre[art$Genre___Others == 1] <- "Other"

# create table and calculate proportions
yeargenre <- table(art$year, art$genre)
ygperc <- prop.table(yeargenre, 1)

# ensure correct order of genres
ygperc <- ygperc[, c("Painting", "Sculpture", "Photography", "Print", "Other")]

# convert to df for Plotly
ygperc_df <- as.data.frame(as.table(ygperc))
colnames(ygperc_df) <- c("Year", "Genre", "Proportion")

# create horizontal stacked bar chart
plot <- plot_ly(
  data = ygperc_df,
  x = ~Proportion,
  y = ~Year,
  color = ~Genre,
  type = "bar",
  orientation = "h",
  colors = c("pink", "lightblue", "lightgreen", "yellow", "orange")
) %>%
  layout(
    barmode = "stack",
    xaxis = list(title = "Proportion of Genre of Art Sales"),
    yaxis = list(title = "Year"),
    legend = list(title = list(text = "Genre"))
  )

plot

As shown above, the distribution of genre sales across years does appear to change. The bar chart shows the changing distribution of art sales proportions by genre from 1997 to 2012. Paintings (red) dominated in the late 1990s but steadily declined, while photography (green) rose significantly, especially after 2005. Sculptures (blue) remained relatively stable, with minor fluctuations, and prints (yellow) gained a small but noticeable increase after 2010. The “Other” category (orange) consistently contributed a minimal share. These trends highlight shifts in buyer preferences and market dynamics over the 15-year period.

Part (B) - Generate an interactive plot with plotly that can address both of these questions from last time.

  1. Is there a change in the sales price in USD over time?

  2. How does the genre affect the change in sales price over time?

This should be a single interactive plot, with which a user can manipulate the view to be able to look at change over time overall, or by genre.

Plotting first graph

library(plotly)

# read in the data
art <- read.csv("df_for_ml_improved_new_market.csv")

# function to select top values above a given percentile
select_top_values <- function(vec, percentile) {
  val <- quantile(vec, percentile)
  return(vec[vec > val])
}

# prepare data for the box plot
save <- list()
for (y in unique(art$year)) {
  prices <- art[art$year == y, "price_usd"]
  save[[as.character(y)]] <- 
    data.frame(year = y,
               price_usd = select_top_values(prices, 0.95))
}
arttop <- do.call(rbind, save)

# prepare data for the median line
artmedian <- aggregate(art$price_usd, by = list(art$year), FUN = median, na.rm = TRUE)
names(artmedian) <- c("year", "price_usd")

# create Plotly plot
plot <- plot_ly() %>%
  add_trace(
    data = arttop,
    x = ~year,
    y = ~price_usd,
    type = "box",
    name = "Top 5%",
    marker = list(symbol = "x")
  ) %>%
  add_trace(
    data = artmedian,
    x = ~year,
    y = ~price_usd,
    type = "scatter",
    mode = "lines+markers",
    line = list(dash = "dot", width = 2),
    name = "Median"
  ) %>%
  layout(
    title = "Changes in Top 5% of Prices",
    xaxis = list(title = "Year"),
    yaxis = list(
      title = "Price in Million USD",
      tickvals = seq(0, 1400000, by = 200000),
      ticktext = paste(seq(0, 1.4, 0.2), "M", sep = "")
    ),
    legend = list(x = 0.1, y = 0.9)
  )

# render the plot
plot

As shown above, there is a change in the sales price in USD over time. The boxplots reveal an upward trend in the top 5% of prices, with the interquartile range (IQR) and outliers expanding significantly in later years, particularly after 2005, indicating increasing variability and higher extreme values. The dashed line representing the median remains relatively low and stable in comparison, emphasizing the disparity between the top 5% and the overall price distribution. The chart suggests that the top 5% of prices have risen considerably over the years, with the rise becoming more pronounced in the mid-2000s.

Plotting second graph

library(plotly)

# read in the data
art <- read.csv("df_for_ml_improved_new_market.csv")

# map genre values
art$Genre___Others[art$Genre___Painting == 1] <- 0
art$genre <- "Photography"
art$genre[art$Genre___Print == 1] <- "Print"
art$genre[art$Genre___Sculpture == 1] <- "Sculpture"
art$genre[art$Genre___Painting == 1] <- "Painting"
art$genre[art$Genre___Others == 1] <- "Other"

# prepare the median and 97.5th percentile data by genre and year
artmedian <- aggregate(price_usd ~ year + genre, data = art, FUN = median, na.rm = TRUE)
art975 <- aggregate(price_usd ~ year + genre, data = art, FUN = function(x) quantile(x, 0.975, na.rm = TRUE))

# initialize Plotly figure
plot1 <- plot_ly()

# add lines for each genre for median and 97.5th percentile
genres <- unique(art$genre)
for (i in seq_along(genres)) {
  genre <- genres[i]
  
  # median line
  plot1 <- plot1 %>%
    add_trace(
      data = artmedian[artmedian$genre == genre, ],
      x = ~year,
      y = ~price_usd,
      type = "scatter",
      mode = "lines",
      line = list(color = i, width = 3),
      name = paste("Median", genre)
    )
  
  # 97.5th percentile line
  plot1 <- plot1 %>%
    add_trace(
      data = art975[art975$genre == genre, ],
      x = ~year,
      y = ~price_usd,
      type = "scatter",
      mode = "lines",
      line = list(color = i, dash = "dot", width = 3),
      name = paste("97.5% Percentile", genre)
    )
}

# add layout elements
plot1 <- plot1 %>%
  layout(
    title = "Changes in Price by Genre",
    xaxis = list(title = "Year", tickvals = seq(1997, 2012, by = 2)),
    yaxis = list(
      title = "Price in Thousands USD",
      tickvals = seq(0, 350000, by = 50000),
      ticktext = paste(seq(0, 350, by = 50), "k", sep = "")
    ),
    legend = list(x = 0.1, y = 0.9)
  )

# render the plot
plot1

The graph shows that genre impacts sales prices significantly over time. Sculpture and Painting display consistent growth in both median and 97.5th percentile prices, indicating stability and sustained value. Photography and Print, however, show sharp spikes in the 97.5th percentile, reflecting occasional high-value sales and greater volatility. The “Other” category remains the lowest in price with minimal growth.

Generating an interactive plot

# combine the two plot objects into one
combined_graph <- plot_ly() |>
  # add traces from the first plot (Top 5% boxplot data)
  add_trace(
    data = arttop, # dataset for Top 5% prices
    x = ~year,
    y = ~price_usd,
    type = "box",
    name = "Top 5%",
    visible = TRUE
  ) |>
  add_trace(
    data = artmedian, # use `artmedian` for the median line
    x = ~year,
    y = ~price_usd,
    type = "scatter",
    mode = "lines+markers",
    line = list(dash = "dot", width = 2),
    name = "Median",
    visible = TRUE
  ) |>
  # add traces from the second plot (Genre-based scatter plot)
  add_trace(
    data = artmedian, # median data for genre-based plot
    x = ~year,
    y = ~price_usd,
    type = "scatter",
    mode = "lines",
    color = ~genre,
    name = "Genre Median",
    visible = FALSE
  ) |>
  add_trace(
    data = art975, # 97.5th percentile data for genre-based plot
    x = ~year,
    y = ~price_usd,
    type = "scatter",
    mode = "lines",
    line = list(dash = "dot"),
    color = ~genre,
    name = "97.5% Percentile",
    visible = FALSE
  )

# add layout and dropdown menu
combined_graph <- combined_graph |>
  layout(
    xaxis = list(title = "Year"),
    yaxis = list(title = "Price (USD)"),
    updatemenus = list(
      list(
        y = 1,
        buttons = list(
          list(
            method = "update",
            args = list(
              list(visible = c(TRUE, TRUE, FALSE, FALSE)), # Show Top 5% and Median traces
              list(title = "Top 5% of Sales Price of Art over the Years")
            ),
            label = "Overall"
          ),
          list(
            method = "update",
            args = list(
              list(visible = c(FALSE, FALSE, TRUE, TRUE)), # Show genre-based traces
              list(title = "Change in Art Price over the Years by Genre")
            ),
            label = "By Genre"
          )
        )
      )
    )
  )

# render combined graph
combined_graph

As shown above, there is a change in the sales price in USD over time.

Problem 3 - data.table

Part (A) - Generate a table (which can just be a nicely printed tibble) reporting the mean and median departure delay per airport. Generate a second table (which again can be a nicely printed tibble) reporting the mean and median arrival delay per airport. Exclude any destination with under 10 flights. Do this exclusion through code, not manually.

Additionally,

Order both tables in descending mean delay. Both tables should use the airport names not the airport codes. Both tables should print all rows.

library(nycflights13)
library(data.table)

Attaching package: 'data.table'
The following objects are masked from 'package:dplyr':

    between, first, last
# convert flights to a data.table
flights <- data.table(flights)

# merge flights with airports by the "faa" column
merged <- merge(flights[, faa := origin],
                airports,
                by = "faa",
                all.x = TRUE)

# calculate statistics and filter results
merged[, .(N = .N,
           mean_delay = mean(dep_delay, na.rm = TRUE),
           med_delay = median(dep_delay, na.rm = TRUE)),
       by = name] |>
  (\(x) x[N >= 10, !"N"])() |>
  (\(x) x[order(mean_delay, decreasing = TRUE)])()
                  name mean_delay med_delay
                <char>      <num>     <num>
1: Newark Liberty Intl   15.10795        -1
2: John F Kennedy Intl   12.11216        -1
3:          La Guardia   10.34688        -3
# convert flights to a data.table
flights <- data.table(flights)

# merge flights with airports by the "faa" column, using "dest" as the faa
merged <- merge(flights[, faa := dest],
                airports,
                by = "faa",
                all.x = TRUE)

# calculate statistics and filter the results
merged[, .(
    name = ifelse(is.na(first(name)), first(faa), first(name)),
    N = .N,
    mean_delay = mean(arr_delay, na.rm = TRUE),
    med_delay = median(arr_delay, na.rm = TRUE)
), by = faa] |>
  (\(x) x[N >= 10, !c("faa", "N")])() |>
  (\(x) x[order(mean_delay, decreasing = TRUE)])() |>
  (\(x) print(x, nrows = 10000))()
                                     name   mean_delay med_delay
                                   <char>        <num>     <num>
  1:                Columbia Metropolitan  41.76415094      28.0
  2:                           Tulsa Intl  33.65986395      14.0
  3:                    Will Rogers World  30.61904762      16.0
  4:                 Jackson Hole Airport  28.09523810      15.0
  5:                        Mc Ghee Tyson  24.06920415       2.0
  6:               Dane Co Rgnl Truax Fld  20.19604317       1.0
  7:                        Richmond Intl  20.11125320       1.0
  8:        Akron Canton Regional Airport  19.69833729       3.0
  9:                      Des Moines Intl  19.00573614       0.0
 10:                   Gerald R Ford Intl  18.18956044       1.0
 11:                      Birmingham Intl  16.87732342      -2.0
 12:         Theodore Francis Green State  16.23463687       1.0
 13: Greenville-Spartanburg International  15.93544304      -0.5
 14:    Cincinnati Northern Kentucky Intl  15.36456376      -3.0
 15:            Savannah Hilton Head Intl  15.12950601      -1.0
 16:          Manchester Regional Airport  14.78755365      -3.0
 17:                          Eppley Afld  14.69889841      -2.0
 18:                               Yeager  14.67164179      -1.5
 19:                     Kansas City Intl  14.51405836       0.0
 20:                          Albany Intl  14.39712919      -4.0
 21:                General Mitchell Intl  14.16722038       0.0
 22:                       Piedmont Triad  14.11260054      -2.0
 23:               Washington Dulles Intl  13.86420212      -3.0
 24:               Cherry Capital Airport  12.96842105     -10.0
 25:              James M Cox Dayton Intl  12.68048606      -3.0
 26:     Louisville International Airport  12.66938406      -2.0
 27:                  Chicago Midway Intl  12.36422360      -1.0
 28:                      Sacramento Intl  12.10992908       4.0
 29:                    Jacksonville Intl  11.84483416      -2.0
 30:                       Nashville Intl  11.81245891      -2.0
 31:                Portland Intl Jetport  11.66040210      -4.0
 32:               Greater Rochester Intl  11.56064461      -5.0
 33:      Hartsfield Jackson Atlanta Intl  11.30011285      -1.0
 34:                Lambert St Louis Intl  11.07846451      -3.0
 35:                         Norfolk Intl  10.94909344      -4.0
 36:            Baltimore Washington Intl  10.72673385      -5.0
 37:                         Memphis Intl  10.64531435      -2.5
 38:                   Port Columbus Intl  10.60132291      -3.0
 39:                  Charleston Afb Intl  10.59296847      -4.0
 40:                    Philadelphia Intl  10.12719014      -3.0
 41:                  Raleigh Durham Intl  10.05238095      -3.0
 42:                    Indianapolis Intl   9.94043412      -3.0
 43:            Charlottesville-Albemarle   9.50000000      -5.0
 44:               Cleveland Hopkins Intl   9.18161129      -5.0
 45:        Ronald Reagan Washington Natl   9.06695204      -2.0
 46:                      Burlington Intl   8.95099602      -4.0
 47:                 Buffalo Niagara Intl   8.94595186      -5.0
 48:                Syracuse Hancock Intl   8.90392501      -5.0
 49:                          Denver Intl   8.60650021      -2.0
 50:                      Palm Beach Intl   8.56297210      -3.0
 51:                                  BQN   8.24549550      -1.0
 52:                             Bob Hope   8.17567568      -3.0
 53:       Fort Lauderdale Hollywood Intl   8.08212154      -3.0
 54:                          Bangor Intl   8.02793296      -9.0
 55:           Asheville Regional Airport   8.00383142      -1.0
 56:                                  PSE   7.87150838       0.0
 57:                      Pittsburgh Intl   7.68099053      -5.0
 58:                       Gallatin Field   7.60000000      -2.0
 59:                 NW Arkansas Regional   7.46572581      -2.0
 60:                           Tampa Intl   7.40852503      -4.0
 61:               Charlotte Douglas Intl   7.36031885      -3.0
 62:             Minneapolis St Paul Intl   7.27016886      -5.0
 63:                      William P Hobby   7.17618819      -4.0
 64:                         Bradley Intl   7.04854369     -10.0
 65:                     San Antonio Intl   6.94537178      -9.0
 66:                      South Bend Rgnl   6.50000000      -3.5
 67:     Louis Armstrong New Orleans Intl   6.49017497      -6.0
 68:                        Key West Intl   6.35294118       7.0
 69:                        Eagle Co Rgnl   6.30434783      -4.0
 70:                Austin Bergstrom Intl   6.01990875      -5.0
 71:                   Chicago Ohare Intl   5.87661475      -8.0
 72:                         Orlando Intl   5.45464309      -5.0
 73:               Detroit Metro Wayne Co   5.42996346      -7.0
 74:                        Portland Intl   5.14157973      -5.0
 75:                        Nantucket Mem   4.85227273      -3.0
 76:                      Wilmington Intl   4.63551402      -7.0
 77:                    Myrtle Beach Intl   4.60344828     -13.0
 78:    Albuquerque International Sunport   4.38188976      -5.5
 79:         George Bush Intercontinental   4.24079040      -5.0
 80:        Norman Y Mineta San Jose Intl   3.44817073      -7.0
 81:               Southwest Florida Intl   3.23814963      -5.0
 82:                       San Diego Intl   3.13916574      -5.0
 83:              Sarasota Bradenton Intl   3.08243131      -5.0
 84:            Metropolitan Oakland Intl   3.07766990      -9.0
 85:   General Edward Lawrence Logan Intl   2.91439222      -9.0
 86:                   San Francisco Intl   2.67289152      -8.0
 87:                                  SJU   2.52052659      -6.0
 88:                         Yampa Valley   2.14285714       2.0
 89:              Phoenix Sky Harbor Intl   2.09704733      -6.0
 90:            Montrose Regional Airport   1.78571429     -10.5
 91:                     Los Angeles Intl   0.54711094      -7.0
 92:               Dallas Fort Worth Intl   0.32212685      -9.0
 93:                           Miami Intl   0.29905978      -9.0
 94:                       Mc Carran Intl   0.25772849      -8.0
 95:                  Salt Lake City Intl   0.17625459      -8.0
 96:                           Long Beach  -0.06202723     -10.0
 97:                Martha\\\\'s Vineyard  -0.28571429     -11.0
 98:                  Seattle Tacoma Intl  -1.09909910     -11.0
 99:                        Honolulu Intl  -1.36519258      -7.0
100:                                  STT  -3.83590734      -9.0
101:            John Wayne Arpt Orange Co  -7.86822660     -11.0
102:                    Palm Springs Intl -12.72222222     -13.5
                                     name   mean_delay med_delay

Part (B) - How many flights did the aircraft model with the fastest average speed take? Produce a tibble with 1 row, and entries for the model, average speed (in MPH) and number of flights.

library(data.table)

# Convert data.frames to data.tables
flights_dt <- as.data.table(flights)
planes_dt <- as.data.table(planes)

# using data.table
result <- flights_dt[
  planes_dt, 
  on = "tailnum"  # Join by "tailnum"
][
  , mph := distance / (air_time / 60)  # Calculate mph
][
  # calculate avg_mph and num_flights
  , .(avg_mph = mean(mph, na.rm = TRUE), num_flights = .N), by = model
][
  order(-avg_mph)  # order by avg_mph descending
][
  1  # select first row
]

print(result)
     model  avg_mph num_flights
    <char>    <num>       <int>
1: 777-222 482.6254           4

As shown above, the aircraft model with the fastest average speed took 4 flights.